22 May, 2017
suppressWarnings(library(tidyverse))
suppressWarnings(library(magrittr))
suppressWarnings(library(knitr))
suppressWarnings(library(broom))
suppressWarnings(library(arsRtools))
This part is not run here, just shown for Documentation. The resulting tidy data frame has been saved and is loaded in the next section.
code not run only shown for documentation
#!/bin/sh
cd /home/schmidm/faststorage/ClaudiaI/Effie_RNAseq_bws
bash ~/ms_tools/MS_Metagene_Tools/RNAseq_sensitivityX.sh reference-point \
"/home/schmidm/annotations/hg19/RefSeqGRCh37.3/ref_GRCh37.p5_top_level_geq3exons_NMq1exon1.bed" \
"*plus.bw" \
"*minus.bw" \
5000 5000 TSS exon1 "eGFP" "Ars2,Cbp80,Z18" \
"deeptools_out/Refseqv3q1_exon1_TSS_sensitivity" "--binSize=50 --numberOfProcessors=4"
cd /home/schmidm/faststorage/ClaudiaI/Effie_RNAseq_bws/Meola_RNAseq_bws/
bash ~/ms_tools/MS_Metagene_Tools/RNAseq_sensitivityX.sh reference-point \
"/home/schmidm/annotations/hg19/RefSeqGRCh37.3/ref_GRCh37.p5_top_level_geq3exons_NMq1exon1.bed" \
"*plus.bw" \
"*minus.bw" \
5000 5000 TSS exon1 "EGFP" "RRP40" \
"deeptools_out/Refseqv3q1_exon1_TSS_sensitivity" "--binSize=50 --numberOfProcessors=4"
matrix files for iasillo: /Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out/Refseqv3q1_exon1_TSS_sensitivity_joined.gz
matrix files for meola /Users/schmidm/Documents/other_people_to_and_from/ClaudiaI/Effie_RNAseq_bws/RNAseq_deeptools_out_meola/Refseqv3q1_exon1_TSS_sensitivity_joined.gz
biotype: multiexonic_RefseqNMq1
anchor: TSS
code not run only shown for documentation
bin_values <- arsRtools::load_RNAseq_matrices(deeptools_file_iasillo, deeptools_file_meola)
bin_values_filename <- paste0('../data/RNAseq/RNAseq_bin_values_', biotype, '_', anchor, '.RData')
Saving bin values to file: ../data/RNAseq/RNAseq_bin_values_multiexonic_RefseqNMq1_TSS.RData
code not run only shown for documentation
save(bin_values, file=bin_values_filename)
everything below is run.
load(bin_values_filename, verbose=TRUE)
## Loading objects:
## bin_values
data('RNAseq_value_heatmap_theme', package='arsRtools')
row_order <- bin_values %>%
group_by(id) %>%
summarize(total_value = sum(value)) %>%
ungroup %>%
arrange(total_value) %$%
id
bin_values %>%
filter(value > 0) %>%
mutate(id = factor(id, levels=row_order)) %>%
ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
geom_raster(interpolate = FALSE) +
facet_grid(.~siRNA) +
xlab(paste0('bp to ', biotype, ' ', anchor)) +
RNAseq_value_heatmap_theme +
theme(axis.text.y=element_blank())
total ids considered:
distinct(bin_values, id) %>%
nrow
## [1] 7233
empty:
empty_ids_TSS <- bin_values %>%
group_by(id) %>%
summarize(sum_value = sum(value),
max_value = max(value)) %>%
filter(sum_value == 0 & max_value == 0) %$%
id
length(empty_ids_TSS)
## [1] 704
save empty for later use …
save(empty_ids_TSS, file='../data/RNAseq/Refseq_multi_exonic_NMq1_empty_atTSS_ids.RData')
bin_sensitivities <- arsRtools::RNAseq_sensitivity_matrix(bin_values)
data('RNAseq_lineplot_theme', package='arsRtools')
bin_sensitivities %>%
group_by(rel_pos, siRNA) %>%
do(tidy(t.test(log2(.$value)))) %>%
ggplot(., aes(x=rel_pos, y=estimate, color=siRNA)) +
geom_ribbon(aes(ymin=conf.low, ymax=conf.high, fill=siRNA, color=NULL), alpha=.5) +
geom_line(aes(color=siRNA)) +
ylab('mean log2 fold-change relative control') +
xlab(paste0('bp to ', biotype, ' ', anchor)) +
RNAseq_lineplot_theme +
xlim(-2000,5000)
data('RNAseq_logFC_heatmap_theme', package='arsRtools')
row_order <- bin_sensitivities %>%
group_by(id) %>%
summarize(total_value = sum(value)) %>%
ungroup %>%
arrange(total_value) %$%
id
bin_sensitivities %<>%
mutate(id = factor(id, levels=row_order))
bin_sensitivities %>%
mutate(value = case_when(.$value > 4 ~ 4,
.$value < .25 ~ .25,
TRUE ~ .$value)) %>%
filter( !(id %in% empty_ids_TSS),
value > 0,
rel_pos > -2000, rel_pos < 5000) %>%
ggplot(., aes(x=rel_pos, y=id, fill=log2(value))) +
geom_raster(interpolate = FALSE) +
facet_grid(.~siRNA) +
RNAseq_logFC_heatmap_theme +
theme(axis.text.y=element_blank())
sessionInfo()
## R version 3.2.3 (2015-12-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.11.6 (El Capitan)
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] arsRtools_0.1 RColorBrewer_1.1-2 rtracklayer_1.30.4
## [4] GenomicRanges_1.22.4 GenomeInfoDb_1.6.3 IRanges_2.4.8
## [7] S4Vectors_0.8.11 BiocGenerics_0.16.1 broom_0.4.1
## [10] knitr_1.15.1 magrittr_1.5 dplyr_0.5.0
## [13] purrr_0.2.2 readr_1.0.0 tidyr_0.6.0
## [16] tibble_1.2 ggplot2_2.2.0 tidyverse_1.0.0
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.8 futile.logger_1.4.3
## [3] plyr_1.8.4 XVector_0.10.0
## [5] futile.options_1.0.0 bitops_1.0-6
## [7] zlibbioc_1.16.0 tools_3.2.3
## [9] digest_0.6.10 evaluate_0.10
## [11] gtable_0.2.0 nlme_3.1-128
## [13] lattice_0.20-34 psych_1.6.9
## [15] DBI_0.5-1 stringr_1.1.0
## [17] Biostrings_2.38.4 rprojroot_1.1
## [19] grid_3.2.3 Biobase_2.30.0
## [21] R6_2.2.0 BiocParallel_1.4.3
## [23] XML_3.98-1.5 foreign_0.8-67
## [25] rmarkdown_1.2 lambda.r_1.1.9
## [27] reshape2_1.4.2 GenomicAlignments_1.6.3
## [29] Rsamtools_1.22.0 backports_1.0.4
## [31] scales_0.4.1 htmltools_0.3.5
## [33] SummarizedExperiment_1.0.2 assertthat_0.1
## [35] mnormt_1.5-5 colorspace_1.3-2
## [37] labeling_0.3 stringi_1.1.2
## [39] RCurl_1.95-4.8 lazyeval_0.2.0
## [41] munsell_0.4.3